In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Read files and saved as dataframe objects

In [2]:
#Read all files
path1 ='C:/Users/gochicken/SisFall_dataset/SisFall_dataset/SAnumber'
path2 = 'C:/Users/gochicken/SisFall_dataset/SisFall_dataset/SEnumber'#Change the route to your 
path_list=[]
for n in range(23):
    if n<9:
        new_path = path1.replace('number','0'+str(n+1))
    else:
        new_path = path1.replace('number',str(n+1))
    path_list.append(new_path)

#save to objects and divide object name into four group
slipfall_objects=[]
tripfall_objects=[]
walking_slowly_objects=[]
walking_quickly_objects=[]
    
g = globals()
for p in path_list:
    patient_number=p.split('/')[-1]
    g[patient_number+'slipfall']=pd.read_csv((p+'/F01_'+patient_number+'_R01.txt'),header=None)
    g[patient_number+'slipfall'].iloc[:,-1]=g[patient_number+'slipfall'].iloc[:,-1].str.replace(';','').astype('int')
    slipfall_objects.append((patient_number+'slipfall'))
for p in path_list:
    patient_number=p.split('/')[-1]
    g[patient_number+'tripfall']=pd.read_csv((p+'/F04_'+patient_number+'_R01.txt'),header=None)
    g[patient_number+'tripfall'].iloc[:,-1]=g[patient_number+'tripfall'].iloc[:,-1].str.replace(';','').astype('int')
    tripfall_objects.append((patient_number+'tripfall'))
    

for n in range(15):
    if n<9:
        new_path = path2.replace('number','0'+str(n+1))
    else:
        new_path = path2.replace('number',str(n+1))
    path_list.append(new_path)

for p in path_list:
    patient_number=p.split('/')[-1]
    g[patient_number+'walking_slowly']=pd.read_csv((p+'/D01_'+patient_number+'_R01.txt'),header=None)
    g[patient_number+'walking_slowly'].iloc[:,-1]=g[patient_number+'walking_slowly'].iloc[:,-1].str.replace(';','').astype('int')
    walking_slowly_objects.append((patient_number+'walking_slowly'))
    
    
for p in path_list:
    patient_number=p.split('/')[-1]
    g[patient_number+'walking_quickly']=pd.read_csv((p+'/D02_'+patient_number+'_R01.txt'),header=None)
    g[patient_number+'walking_quickly'].iloc[:,-1]=g[patient_number+'walking_quickly'].iloc[:,-1].str.replace(';','').astype('int')
    walking_quickly_objects.append((patient_number+'walking_quickly'))

    
     
    

Normalize raw data to internationl units (g,-/s) and transfer all dataframe to np ndarray objects

In [3]:
for object_name in slipfall_objects+tripfall_objects+walking_slowly_objects+walking_quickly_objects:
    g[object_name].iloc[:,:3]=g[object_name].iloc[:,:3].apply(lambda x : (2*16)/(2**13)*x)
    g[object_name].iloc[:,3:6]=g[object_name].iloc[:,3:6].apply(lambda x : (2*2000)/(2**16)*x)
    g[object_name].iloc[:,6:]=g[object_name].iloc[:,6:].apply(lambda x : (2*8)/(2**14)*x)
    g[object_name]=g[object_name].to_numpy()

Parse the first 15s of Walking slowly and quickly tests

In [4]:
walking_slowly_objects_1st15s=[]
walking_quickly_objects_1st15s=[]
for object_name in walking_slowly_objects:
    g[object_name+'_1st15s'] = g[object_name][:3000,:]
    walking_slowly_objects_1st15s.append(object_name+'_1st15s')
for object_name in walking_quickly_objects:
    g[object_name+'_1st15s'] = g[object_name][:3000,:]
    walking_quickly_objects_1st15s.append(object_name+'_1st15s')

slow walking vs fast walking (ADXL345 data)

In [5]:
fig=plt.figure(figsize=(16,150))
for object_name,n in zip(walking_slowly_objects_1st15s,range(1,39)):
    ax1=fig.add_subplot(38,2,2*n-1)
    ax1.plot((np.arange(3000))/200,g[object_name][:,0],c='blue',label='x')
    ax1.plot((np.arange(3000))/200,g[object_name][:,1],c='green',label='y')
    ax1.plot((np.arange(3000))/200,g[object_name][:,2],c='red',label='z')
    ax1.set_ylim(-4,4)
    ax1.set_title(object_name)
    ax1.legend()

for object_name,n in zip(walking_quickly_objects_1st15s,range(1,39)):
    ax1=fig.add_subplot(38,2,2*n)
    ax1.plot((np.arange(3000))/200,g[object_name][:,0],c='blue',label='x')
    ax1.plot((np.arange(3000))/200,g[object_name][:,1],c='green',label='y')
    ax1.plot((np.arange(3000))/200,g[object_name][:,2],c='red',label='z')
    ax1.set_ylim(-4,4)
    ax1.set_title(object_name)
    ax1.legend()
plt.show()

It seems not every participant starts walking immediately after accelerometer recording,such as SA03,SE04,SE05,who's waiting for seconds after order.Therefore,It's necessary to parse time series data after 6th seconds

Parse time series data after the 6th second

In [6]:
walking_slowly_objects_15s=[]
walking_quickly_objects_15s=[]
for object_name in walking_slowly_objects:
    g[object_name+'_15s'] = g[object_name][1200:4200,:]
    walking_slowly_objects_15s.append(object_name+'_15s')
for object_name in walking_quickly_objects:
    g[object_name+'_15s'] = g[object_name][1200:4200,:]
    walking_quickly_objects_15s.append(object_name+'_15s')
In [7]:
fig=plt.figure(figsize=(16,150))
for object_name,n in zip(walking_slowly_objects_15s,range(1,39)):
    ax=fig.add_subplot(38,2,2*n-1)
    ax.plot((np.arange(3000))/200,g[object_name][:,0],c='blue',label='x')
    ax.plot((np.arange(3000))/200,g[object_name][:,1],c='green',label='y')
    ax.plot((np.arange(3000))/200,g[object_name][:,2],c='red',label='z')
    ax.set_ylim(-4,4)
    ax.set_title(object_name)
    ax.legend()

for object_name,n in zip(walking_quickly_objects_15s,range(1,39)):
    ax=fig.add_subplot(38,2,2*n)
    ax.plot((np.arange(3000))/200,g[object_name][:,0],c='blue',label='x')
    ax.plot((np.arange(3000))/200,g[object_name][:,1],c='green',label='y')
    ax.plot((np.arange(3000))/200,g[object_name][:,2],c='red',label='z')
    ax.set_ylim(-4,4)
    ax.set_title(object_name)
    ax.legend()
plt.show()

By comparing the pairwise figures, I think the group of quick ambulation demonstrated more intensive crest,especially in terms of the y-axis vectors(green line),which revealed larger cadence. So I'm going to se if we can parse peak times per seconds. I referenced the process of extracting gait dynamics with sensormotion package on Github https://github.com/sho-87/sensormotion

Noise filtering

In [8]:
import sensormotion as sm
#create a time object in milliseconds
sampling_rate = 200
seconds = 15
time = np.arange(0, seconds*sampling_rate) *5
#build a filter with 4th order low-pass filter with cut-off frequency of 5 Hz,Just like the previous research reported
b, a = sm.signal.build_filter(5, sampling_rate, 'low', filter_order=4)

#filtered all objects in walking_slowly and walking_quickly lists
filtered_objects_ADXL345=[]
fig=plt.figure(figsize=(16,150))
for object_name,n in zip(walking_slowly_objects_15s,range(1,39)):
    x_f = sm.signal.filter_signal(b, a, g[object_name][:,0])
    y_f = sm.signal.filter_signal(b, a, g[object_name][:,1])  
    z_f = sm.signal.filter_signal(b, a, g[object_name][:,2]) 
    g[object_name+'_f'] = np.stack((x_f,y_f,z_f),axis=-1)
    filtered_objects_ADXL345.append(object_name+'_f')
    ax1 = fig.add_subplot(38,2,2*n-1)
    ax1.plot((np.arange(3000))/200,x_f,c='blue',label='x')
    ax1.plot((np.arange(3000))/200,y_f,c='green',label='y')
    ax1.plot((np.arange(3000))/200,z_f,c='red',label='z')
    ax1.set_ylim(-4,4)
    ax1.set_title(object_name)
    ax1.legend()
   
for object_name,n in zip(walking_quickly_objects_15s,range(1,39)):
    x_f = sm.signal.filter_signal(b, a, g[object_name][:,0])
    y_f = sm.signal.filter_signal(b, a, g[object_name][:,1])  
    z_f = sm.signal.filter_signal(b, a, g[object_name][:,2])
    g[object_name+'_f'] = np.stack((x_f,y_f,z_f),axis=-1)
    filtered_objects_ADXL345.append(object_name+'_f')
    ax1=fig.add_subplot(38,2,2*n)
    ax1.plot((np.arange(3000))/200,x_f,c='blue',label='x')
    ax1.plot((np.arange(3000))/200,y_f,c='green',label='y')
    ax1.plot((np.arange(3000))/200,z_f,c='red',label='z')
    ax1.set_ylim(-4,4)
    ax1.set_title(object_name)
    ax1.legend()
    
plt.show()

Counting peaks

In [9]:
cadence_grouped={}
for object_name in filtered_objects_ADXL345:
    peak_times,_ = sm.peak.find_peaks(time, g[object_name][:,1], peak_type='valley',min_dist=70,min_val=0.45,plot=True)
    cadence = sm.gait.cadence(time, peak_times)
    cadence_grouped[object_name] = cadence
    
#I have no idea about how to set a resonable thereshold of minimal interval and normalized amplitude.After several times attempts, 
#I adjusted min_dist to 70 and min_val to 0.45 and found it was appropriate to include nearly all authentic peaks.

Visualize the Cadence between walking speed groups

In [10]:
cadence_grouped=pd.DataFrame.from_dict(cadence_grouped, orient='index',columns=['cadence'])
cadence_grouped['age']=['young' if 'SA'in x else 'elderly' for x in cadence_grouped.index]
cadence_grouped['mode']=['slowly' if 'slowly'in x else 'quickly' for x in cadence_grouped.index]

cadence_grouped #create a dataframe including cadence for all participants
Out[10]:
cadence age mode
SA01walking_slowly_15s_f 96.032011 young slowly
SA02walking_slowly_15s_f 104.034678 young slowly
SA03walking_slowly_15s_f 108.036012 young slowly
SA04walking_slowly_15s_f 92.030677 young slowly
SA05walking_slowly_15s_f 88.029343 young slowly
... ... ... ...
SE11walking_quickly_15s_f 124.041347 elderly quickly
SE12walking_quickly_15s_f 116.038680 elderly quickly
SE13walking_quickly_15s_f 120.040013 elderly quickly
SE14walking_quickly_15s_f 116.038680 elderly quickly
SE15walking_quickly_15s_f 136.045348 elderly quickly

76 rows × 3 columns

In [11]:
#draw a boxplot
import seaborn as sns
ax=sns.boxplot(data=cadence_grouped,x='mode',y='cadence',hue='age',palette="Set2")

Expect for outliers,I found for each inner-group(young vs elderly) or inter-group(slowly vs quickly) comparison, the lower quartile is higher than upper quartile of another object, which means when considering cadence as the filtering threshold we should set up different values between young and old participants. According to the boxplot results, I think the cadence of 100.033 for young participants group and 112.037 for elderly participants group is appropriate for discriminating slow or quick ambulation.

In [12]:
#see if we can define a filtering value
print(cadence_grouped[(cadence_grouped['age']=='young') & (cadence_grouped['mode']=='slowly')].describe().iloc[-2])#'young slowly' upper quartile
print(cadence_grouped[(cadence_grouped['age']=='young') & (cadence_grouped['mode']=='quickly')].describe().iloc[-4])#'young quickly' lower quartile
print(cadence_grouped[(cadence_grouped['age']=='elderly') & (cadence_grouped['mode']=='slowly')].describe().iloc[-2])#'eldly slowly' upper quartile
print(cadence_grouped[(cadence_grouped['age']=='elderly') & (cadence_grouped['mode']=='quickly')].describe().iloc[-4])#'young slowly' lower quartile
cadence    96.032011
Name: 75%, dtype: float64
cadence    112.037346
Name: 25%, dtype: float64
cadence    104.034678
Name: 75%, dtype: float64
cadence    116.03868
Name: 25%, dtype: float64
In [13]:
#I'll take the average as filtering value
young_cadence_cutoff = (96.0320119+112.037346)/2
eldely_cadence_cutoff = (104.034678+116.03868)/2

On the other hand, I think we should try to distinguish walking speed through signal magnitude on frontal plane, because walking slowly could include longer standing phase, which could lead an overactivity of hip abductors and potential instability in the frontal plane. So I'm going to see if it could reflects on the data of frontal vectors.

In [ ]:
for object_name in filtered_objects_ADXL345: